idxstats_exogenousrna_dir <-
"results/samtools_idxstats/exogenous_rna/"
idxstats_human_dir <-
"results/samtools_idxstats/Homo_sapiens.GRCh38.dna.primary_assembly/"
bowtie2_human_logs <-
"results/logs/bowtie2/Homo_sapiens.GRCh38.dna.primary_assembly/"
idxstats <- tibble ()
for (row in seq_len (nrow (sample_units))) {
sample <- sample_units[row, ]$ sample_unit
# Read `idsxstats` for exogenous mapped reads
exogenous_rna_stats <- read_tsv (
file.path (idxstats_exogenousrna_dir, sprintf ("%s.bam.idxstats" , sample)),
col_names = c (
"sequence_name" , "sequence_length" ,
"mapped_reads" , "unmapped_reads"
),
col_types = "ciii"
)
exogenous_rna_mapped_reads <- exogenous_rna_stats %>%
dplyr:: filter (! sequence_name %in% c ("*" )) %>%
dplyr:: select (sequence_name, mapped_reads) %>%
dplyr:: mutate (sample = sample)
# Read `idxstats` for human mapped reads
human_stats <- read_tsv (
file.path (idxstats_human_dir, sprintf ("%s.bam.idxstats" , sample)),
col_names = c (
"sequence_name" , "sequence_length" ,
"mapped_reads" , "unmapped_reads"
),
col_types = "ciii"
)
grch38_mapped_reads <- human_stats %>%
dplyr:: filter (! sequence_name %in% c ("*" )) %>%
dplyr:: select (mapped_reads) %>%
sum ()
grch38_mapped_reads <- tibble (
sequence_name = "grch38_mapped_reads" ,
mapped_reads = grch38_mapped_reads,
sample = sample
)
# Read bowtie2 logs for unmapped reads
bowtie2_log <- readLines (
file.path (bowtie2_human_logs, sprintf ("%s.log" , sample))
)
total_pairs <- strtoi (str_split (bowtie2_log[1 ], " " )[[1 ]][1 ])
total_reads <- total_pairs * 2
unmapped_reads <- tibble (
sequence_name = "unmapped" ,
mapped_reads = total_reads - grch38_mapped_reads$ mapped_reads,
sample = sample
)
# Consolidate counts for rows
idxstats <- rbind (
idxstats,
exogenous_rna_mapped_reads,
grch38_mapped_reads,
unmapped_reads
)
}
idxstats